print(Sys.time())

options(stringsAsFactors=F)

suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(xtable))

#### ALL
load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_all_docs_only_replicate_psrm.RData")
all_docs <- document_scores_election16[,-which(names(document_scores_election16) %in% paste0("X", 10:99))]
all_keywords <- the_keywords

#### Hashtag gamer trolls
load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_HashtagGamer_docs_only_replicate_psrm.RData")
docs <- document_scores_election16
names(docs)[which(names(docs) %in% paste0("X", 0:9))] <- paste0("X", 0:9, "_hashtag")
all_docs <- merge(
    all_docs,
    docs %>% select(-userid),
    by="tweetid",
    all=T
)
hashtag_keywords <- the_keywords
#### Left trolls
load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_LeftTroll_docs_only_replicate_psrm.RData")
docs <- document_scores_election16
names(docs)[which(names(docs) %in% paste0("X", 0:9))] <- paste0("X", 0:9, "_left")
all_docs <- merge(
    all_docs,
    docs %>% select(-userid),
    by="tweetid",
    all=T
)
left_keywords <- the_keywords
left_keywords_alt_plus <- the_keywords_alt_plus
#### News feed trolls
load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_NewsFeed_docs_only_replicate_psrm.RData")
docs <- document_scores_election16
names(docs)[which(names(docs) %in% paste0("X", 0:9))] <- paste0("X", 0:9, "_news")
all_docs <- merge(
    all_docs,
    docs %>% select(-userid),
    by="tweetid",
    all=T
)
news_keywords <- the_keywords
#### Right trolls
load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_RightTroll_docs_only_replicate_psrm.RData")
docs <- document_scores_election16
names(docs)[which(names(docs) %in% paste0("X", 0:9))] <- paste0("X", 0:9, "_right")
all_docs <- merge(
    all_docs,
    docs %>% select(-userid),
    by="tweetid",
    all=T
)
right_keywords <- the_keywords


all_docs$not_holdout_alt <- with(
    all_docs,
    !(sapply(
        user_profile_description,
        function(x) any(utf8ToInt(x) %in% 1000:1999)) | account_language!="en"
    )                                   #linvill warren filter redundant here
)

all_docs <- all_docs %>% mutate(
        user_mentions_unlisted = sapply(
        gsub(
            "\\[|\\]", "",
            user_mentions
        ),
        strsplit, ", ")
        )

save(
    all_docs,
    right_keywords, news_keywords, left_keywords, hashtag_keywords, all_keywords,
    file="data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_docs_only_combined_replicate_psrm.RData"
)


## load(
##     ## all_docs,
##     ## right_keywords, news_keywords, left_keywords, hashtag_keywords, all_keywords,
##     file="data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_docs_only_combined_replicate_psrm.RData"
## )


top2d_keywords <- function(keywords, select_ds = 2:3, select_columns = 1:4) {
    ## dimension 0 [[1] is a frequency dimension
    keywords_df <- do.call(
        "cbind",
        keywords[select_ds]
    )[1:15,]
    keywords_xt <- xtable(
        data.frame(keywords_df[,select_columns])
    )
    print(keywords_xt, include.rownames=FALSE)
}

cat("\n#### ####")
cat("\n#### Table A5\n")
top2d_keywords(all_keywords)
cat("\n#### ####")
cat("\n#### Figure 1 (top keywords, conservative)\n")
top2d_keywords(all_keywords, select_columns = 3)
cat("\n#### ####")
cat("\n#### Figure 1 (top keywords, liberal)\n")
top2d_keywords(all_keywords, select_columns = 4)
cat("\n#### ####")
cat("\n#### Table A6\n")
top2d_keywords(left_keywords)
cat("\n#### ####")
cat("\n#### Table A7\n")
top2d_keywords(right_keywords)


## load("data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_LeftTroll_docs_only_replicate_psrm.RData")

cat("\n#### ####")
cat("\n#### Figure 1 (bottom keywords, political)\n")
top2d_keywords(list(left_keywords_alt_plus), 1, 1)
cat("\n#### ####")
cat("\n#### Figure 1 (bottom keywords, non-political)\n")
top2d_keywords(list(left_keywords_alt_plus), 1, 2)


mi_keywords_tables <- function(
                               .mi_vocab_list_2016_v_2015 = mi_vocab_list_2016_v_2015,
                               .mi_vocab_list_these_trolls_v_others = mi_vocab_list_these_trolls_v_others,
                               .the_cluster = the_cluster
                               ) {
    df_16v15 <- data.frame(
        `2016 words` = names(.mi_vocab_list_2016_v_2015$pos),
        `2015 words` = names(.mi_vocab_list_2016_v_2015$neg)
    )
    if (any(!is.na(.mi_vocab_list_these_trolls_v_others))) {
        df_thesevothers <- data.frame(
            names(.mi_vocab_list_these_trolls_v_others$pos)
        )
        names(df_thesevothers) <- paste0(.the_cluster, " words")
        df_keywords <- cbind(df_thesevothers, df_16v15)
        print(xtable(df_keywords), include.rownames=FALSE)
    } else {
            print(xtable(df_16v15), include.rownames=FALSE)
    }
}

cat("\n#### ####")
cat("\n#### Tables A12-A16\n")
for (the_cluster in c("all","LeftTroll","RightTroll","HashtagGamer","NewsFeed")) {
    load(
        paste0(
            "data/en_tweets_2009-05-09_to_2016-11-08_dimensions_election16_remove_news_",
            the_cluster,
            "_mi_keywords_replicate_psrm.RData"
        )
    )
    mi_keywords_tables()
}



## Boycott, Don't vote, Do not vote, Didn't vote, Sit out the election, Fuck the election, Do not support, Don't support, Can't support, Truth about election, Deserve our vote, Before you vote, Not voting, Vote illegally, Rigged.

## boycott, don't vote, do not vote, didn't vote, sit out the election, fuck the election, do not support, don't support, can't support, truth about election, deserve our vote, before you vote, not voting, vote illegally, rigged

print(Sys.time())
